Relación con muertes

Autor/a

Brian Norman Peña-Calero

Fecha de publicación

28 de febrero de 2024

1 Cargar paquetes

Código
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Código
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack
Código
library(lmerTest)

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
Código
library(merDeriv)
Loading required package: nonnest2
This is nonnest2 0.5-6.
nonnest2 has not been tested with all combinations of supported model classes.
Loading required package: sandwich
Loading required package: lavaan
This is lavaan 0.6-17
lavaan is FREE software! Please report any bugs.
Código
library(performance)
library(parameters)
library(glmmTMB)
library(MASS)

Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select
Código
# options(scipen = 9)

conflicted::conflicts_prefer(dplyr::filter)
[conflicted] Will prefer dplyr::filter over any other package.
Código
conflicted::conflicts_prefer(dplyr::select)
[conflicted] Will prefer dplyr::select over any other package.
Código
conflicted::conflicts_prefer(lmerTest::lmer)
[conflicted] Will prefer lmerTest::lmer over any other package.
Código
options(parameters_labels = TRUE) 

2 Importar datos

Código
gbmt_fit_final <- readRDS("01_data/processed/gbmt_fit_final.rds")
deaths_by_year <- read_csv(file = "01_data/processed/deaths_by_year.csv")
Rows: 6945 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): ISO2
dbl (3): SALID1, YEAR, deaths

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

3 Formato de datos

Código
gbmt_log_3 <- gbmt_fit_final %>% 
  filter(scale_name == "logarithmic", ng == 3) %>% 
  slice(1)

gbmt_log_3 <- gbmt_log_3$gbmt_fit_total[[1]]


gbmt_log_4 <- gbmt_fit_final %>% 
  filter(scale_name == "logarithmic", ng == 4) %>% 
  slice(1)

gbmt_log_4 <- gbmt_log_4$gbmt_fit_total[[1]]


gbmt_log <- gbmt_log_3 %>% 
  bind_rows(gbmt_log_4,
            .id = "Clusters") %>% 
  mutate(Clusters = case_match(Clusters,
                               "1" ~ 3,
                               "2" ~ 4))

Unificar datos:

Código
gbmt_log_merged <- gbmt_log %>%
  select(Clusters, ISO2, pubsalid1, SALID1, YEAR = year, population_imp_norm:group) %>% 
  inner_join(
    deaths_by_year
  ) %>% 
  mutate(deaths_per_100k = (deaths / population_imp) * 100000)
Joining with `by = join_by(ISO2, SALID1, YEAR)`

4 Tabla descriptiva

Código
gbmt_log_descriptive <- gbmt_log %>%
  filter(Clusters == 4) %>% 
  select(country, ISO2, SALID1, YEAR = year, population_imp:group) %>% 
  inner_join(
    deaths_by_year
  ) %>% 
  mutate(
    country = str_to_title(country),
    deaths_per_100k = (deaths / population_imp) * 100000
  ) %>% 
  select(-c(ISO2, SALID1, YEAR, deaths)) %>% 
  relocate(group, .after = deaths_per_100k)
Joining with `by = join_by(ISO2, SALID1, YEAR)`
Código
labelled::var_label(gbmt_log_descriptive) <- list(
  country = "Country",
  population_imp = "Population",
  bectuareal1ux_imp = "Urban Area",
  deaths_per_100k    = "Deaths per 100K",
  group = "Clusters"
)
Código
library(gtsummary)
tab1 <- gbmt_log_descriptive %>% 
  tbl_summary(
    by = group,
    percent = "row"
  ) %>% 
  modify_header(label = "") %>%
  bold_labels() %>% 
  add_overall(last = TRUE)

tab1
1, N = 1,7491 2, N = 9641 3, N = 1,0591 4, N = 2141 Overall, N = 3,9861
Country




    Argentina 49 (50%) 14 (14%) 35 (36%) 0 (0%) 98 (100%)
    Brazil 1,200 (54%) 576 (26%) 384 (17%) 48 (2.2%) 2,208 (100%)
    Chile 128 (44%) 64 (22%) 64 (22%) 32 (11%) 288 (100%)
    Colombia 190 (38%) 57 (12%) 209 (42%) 38 (7.7%) 494 (100%)
    Costa Rica 16 (100%) 0 (0%) 0 (0%) 0 (0%) 16 (100%)
    El Salvador 15 (33%) 15 (33%) 15 (33%) 0 (0%) 45 (100%)
    Guatemala 7 (100%) 0 (0%) 0 (0%) 0 (0%) 7 (100%)
    Mexico 144 (18%) 224 (27%) 352 (43%) 96 (12%) 816 (100%)
    Panama 0 (0%) 14 (100%) 0 (0%) 0 (0%) 14 (100%)
Population 196,337 (139,475, 274,233) 140,618 (120,151, 208,516) 414,158 (211,435, 850,431) 176,715 (134,794, 253,010) 200,752 (135,821, 349,456)
Urban Area 2,742 (1,486, 5,111) 1,726 (974, 2,953) 5,706 (2,691, 10,468) 1,176 (678, 3,384) 2,733 (1,442, 5,966)
Deaths per 100K 765 (639, 1,063) 706 (575, 997) 562 (457, 779) 574 (490, 940) 701 (553, 942)
1 n (%); Median (IQR)
Código
tab2 <- gbmt_log_descriptive %>% 
  tbl_summary(
    by = country,
    percent = "row"
  ) %>% 
  modify_header(label = "") %>%
  bold_labels() %>% 
  add_overall(last = TRUE)

tab2
Argentina, N = 981 Brazil, N = 2,2081 Chile, N = 2881 Colombia, N = 4941 Costa Rica, N = 161 El Salvador, N = 451 Guatemala, N = 71 Mexico, N = 8161 Panama, N = 141 Overall, N = 3,9861
Population 281,699 (127,245, 555,445) 186,154 (130,094, 302,125) 180,732 (143,769, 232,177) 235,196 (141,438, 366,436) 294,932 (287,559, 302,885) 252,434 (247,207, 303,155) 787,981 (786,722, 789,242) 244,510 (141,902, 870,984) 467,866 (464,203, 472,790) 200,752 (135,821, 349,456)
Urban Area 4,330 (3,327, 11,983) 2,616 (1,540, 4,918) 1,547 (981, 2,373) 1,072 (451, 2,189) 31,222 (30,766, 31,689) 1,154 (195, 11,561) 20,671 (20,520, 20,821) 5,055 (2,678, 11,149) 13,553 (12,304, 14,368) 2,733 (1,442, 5,966)
Deaths per 100K 1,158 (807, 2,390) 697 (583, 856) 662 (587, 750) 612 (517, 851) 3,290 (2,950, 3,582) 719 (531, 2,581) 2,293 (2,199, 2,405) 791 (472, 1,266) 1,539 (1,373, 1,711) 701 (553, 942)
Clusters









    1 49 (2.8%) 1,200 (69%) 128 (7.3%) 190 (11%) 16 (0.9%) 15 (0.9%) 7 (0.4%) 144 (8.2%) 0 (0%) 1,749 (100%)
    2 14 (1.5%) 576 (60%) 64 (6.6%) 57 (5.9%) 0 (0%) 15 (1.6%) 0 (0%) 224 (23%) 14 (1.5%) 964 (100%)
    3 35 (3.3%) 384 (36%) 64 (6.0%) 209 (20%) 0 (0%) 15 (1.4%) 0 (0%) 352 (33%) 0 (0%) 1,059 (100%)
    4 0 (0%) 48 (22%) 32 (15%) 38 (18%) 0 (0%) 0 (0%) 0 (0%) 96 (45%) 0 (0%) 214 (100%)
1 Median (IQR); n (%)
Código
tab1 %>%
  as_flex_table() %>%
  flextable::save_as_docx(path = "./02_output/tables/descriptive1.docx")

tab2 %>%
  as_flex_table() %>%
  flextable::save_as_docx(path = "./02_output/tables/descriptive2.docx")

5 Gráfico inicial de promedio de muertes

Código
gbmt_log_merged %>% 
  group_by(Clusters, group, YEAR) %>% 
  summarise(
    deaths_per_100k = mean(deaths_per_100k)
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x = YEAR, y = deaths_per_100k, color = as.factor(group))) +
  geom_line(aes(group = group)) +
  geom_point() +
  
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.

Código
ggsave(
  filename = "02_output/plots/promedio_muertes_group_log.png",
  dpi = 300,
  bg = "white"
)

6 Modelo lineal mixto

6.1 Modelo 1 (sin offset)

6.1.1 Lineal

Código
model_3 <- lmer(
  deaths_per_100k ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 3)
)

performance(model_3) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma
51062.24 51062.28 51112.57 0.98 0.05 0.98 115.89 119.79
Código
parameters(model_3) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI t(3978) p
Fixed Effects
(Intercept) -15204.66 1535.92 (-18215.92, -12193.39) -9.90 < .001
YEAR 7.94 0.76 (6.44, 9.43) 10.39 < .001
group (2) -19921.91 1889.31 (-23626.03, -16217.80) -10.54 < .001
group (3) -4769.63 2522.78 (-9715.70, 176.44) -1.89 0.059
YEAR * group (2) 10.08 0.94 (8.24, 11.93) 10.73 < .001
YEAR * group (3) 2.40 1.25 (-0.06, 4.86) 1.91 0.056
Random Effects
SD (Intercept: SALID1) 753.94
SD (Residual) 119.79
Código
parameters(model_3, standardize = "basic") %>% print_html()
Standardizing coefficients only works for fixed effects of the mixed
  model.
Model Summary
Parameter Std. Coef. SE 95% CI t(3978) p
(Intercept) 0.00 0.00 (0.00, 0.00) -9.90 < .001
YEAR 0.05 5.19e-03 (0.04, 0.06) 10.39 < .001
group (2) -14.12 1.34 (-16.75, -11.49) -10.54 < .001
group (3) -2.53 1.34 (-5.15, 0.09) -1.89 0.059
YEAR * group (2) 14.35 1.34 (11.73, 16.97) 10.73 < .001
YEAR * group (3) 2.56 1.34 (-0.06, 5.17) 1.91 0.056
Código
model_4 <- lmer(
  deaths_per_100k ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 4)
)

performance(model_4) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma
51031.21 51031.27 51094.12 0.98 0.04 0.98 115.56 119.47
Código
parameters(model_4) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI t(3976) p
Fixed Effects
(Intercept) -29967.87 1235.35 (-32389.85, -27545.89) -24.26 < .001
YEAR 15.42 0.61 (14.22, 16.63) 25.11 < .001
group (2) -9544.70 2079.32 (-13621.32, -5468.07) -4.59 < .001
group (3) 16889.04 1990.68 (12986.19, 20791.90) 8.48 < .001
group (4) 11795.16 3654.48 (4630.33, 18959.99) 3.23 0.001
YEAR * group (2) 4.78 1.03 (2.75, 6.80) 4.62 < .001
YEAR * group (3) -8.55 0.99 (-10.49, -6.61) -8.64 < .001
YEAR * group (4) -6.02 1.82 (-9.59, -2.46) -3.32 < .001
Random Effects
SD (Intercept: SALID1) 757.19
SD (Residual) 119.47
Código
parameters(model_4, standardize = "basic") %>% print_html()
Standardizing coefficients only works for fixed effects of the mixed
  model.
Model Summary
Parameter Std. Coef. SE 95% CI t(3976) p
(Intercept) 0.00 0.00 (0.00, 0.00) -24.26 < .001
YEAR 0.10 4.17e-03 (0.10, 0.11) 25.11 < .001
group (2) -5.84 1.27 (-8.33, -3.34) -4.59 < .001
group (3) 10.65 1.26 (8.19, 13.11) 8.48 < .001
group (4) 3.80 1.18 (1.49, 6.10) 3.23 0.001
YEAR * group (2) 5.86 1.27 (3.38, 8.35) 4.62 < .001
YEAR * group (3) -10.83 1.25 (-13.29, -8.37) -8.64 < .001
YEAR * group (4) -3.89 1.17 (-6.19, -1.59) -3.32 < .001

6.1.2 Poisson

Código
model_3_poisson <- glmer(
  deaths ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_3_poisson) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
93071.60 93071.62 93115.63 1.00 0.07 1.00 518.86 1 -11.31 0.01
Código
parameters(model_3_poisson) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -28.20 0.21 (-28.61, -27.78) -132.98 < .001
YEAR 0.02 8.81e-05 (0.02, 0.02) 204.21 < .001
group (2) -3.42 0.27 (-3.94, -2.90) -12.85 < .001
group (3) -15.05 0.50 (-16.04, -14.07) -29.87 < .001
YEAR * group (2) 1.53e-03 1.12e-04 (1.31e-03, 1.75e-03) 13.70 < .001
YEAR * group (3) 7.12e-03 2.32e-04 (6.67e-03, 7.58e-03) 30.67 < .001
Random Effects
SD (Intercept: SALID1) 0.97
Código
parameters(model_3_poisson, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 5.67e-13 1.20e-13 (3.74e-13, 8.59e-13) -132.98 < .001
YEAR 1.02 8.97e-05 (1.02, 1.02) 204.21 < .001
group (2) 0.03 8.71e-03 (0.02, 0.06) -12.85 < .001
group (3) 2.90e-07 1.46e-07 (1.08e-07, 7.79e-07) -29.87 < .001
YEAR * group (2) 1.00 1.12e-04 (1.00, 1.00) 13.70 < .001
YEAR * group (3) 1.01 2.34e-04 (1.01, 1.01) 30.67 < .001
Random Effects
SD (Intercept: SALID1) 0.97
Código
model_4_poisson <- glmer(
  deaths ~ YEAR * group + (1|SALID1), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_4_poisson) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
91102.05 91102.09 91158.66 1.00 0.08 1.00 505.56 1 -11.07 0.01
Código
parameters(model_4_poisson) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -28.75 0.17 (-29.09, -28.41) -165.04 < .001
YEAR 0.02 7.40e-05 (0.02, 0.02) 245.19 < .001
group (2) -17.82 0.37 (-18.55, -17.09) -47.76 < .001
group (3) 0.12 0.28 (-0.42, 0.66) 0.45 0.655
group (4) -10.50 0.78 (-12.02, -8.98) -13.50 < .001
YEAR * group (2) 8.66e-03 1.69e-04 (8.32e-03, 8.99e-03) 51.07 < .001
YEAR * group (3) 6.76e-05 1.16e-04 (-1.59e-04, 2.95e-04) 0.58 0.559
YEAR * group (4) 4.98e-03 3.61e-04 (4.27e-03, 5.68e-03) 13.79 < .001
Random Effects
SD (Intercept: SALID1) 0.96
Código
parameters(model_4_poisson, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 3.27e-13 5.70e-14 (2.32e-13, 4.60e-13) -165.04 < .001
YEAR 1.02 7.53e-05 (1.02, 1.02) 245.19 < .001
group (2) 1.82e-08 6.79e-09 (8.76e-09, 3.78e-08) -47.76 < .001
group (3) 1.13 0.31 (0.66, 1.94) 0.45 0.655
group (4) 2.75e-05 2.14e-05 (6.00e-06, 1.26e-04) -13.50 < .001
YEAR * group (2) 1.01 1.71e-04 (1.01, 1.01) 51.07 < .001
YEAR * group (3) 1.00 1.16e-04 (1.00, 1.00) 0.58 0.559
YEAR * group (4) 1.00 3.63e-04 (1.00, 1.01) 13.79 < .001
Random Effects
SD (Intercept: SALID1) 0.96

6.2 Modelo 2 - Con offset

6.2.1 Estimación

Estimación de los modelos con la información poblacional como offset

Código
model_3_poisson_off <- glmer(
  deaths ~ YEAR * group + (1|SALID1)  + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.00333122 (tol = 0.002, component 1)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_3_poisson_off) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
1.26e+05 1.26e+05 1.26e+05 0.06 6.61e-03 0.05 554.63 1 -15.46 0.01
Código
parameters(model_3_poisson_off) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 5.66 0.19 (5.29, 6.03) 30.15 < .001
YEAR -5.37e-03 8.83e-05 (-5.54e-03, -5.19e-03) -60.79 < .001
group (2) -43.12 0.24 (-43.59, -42.66) -181.64 < .001
group (3) -36.90 0.48 (-37.84, -35.96) -76.97 < .001
YEAR * group (2) 0.02 1.12e-04 (0.02, 0.02) 193.27 < .001
YEAR * group (3) 0.02 2.33e-04 (0.02, 0.02) 79.05 < .001
Random Effects
SD (Intercept: SALID1) 0.51
Código
parameters(model_3_poisson_off, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 286.84 53.84 (198.55, 414.39) 30.15 < .001
YEAR 0.99 8.78e-05 (0.99, 0.99) -60.79 < .001
group (2) 1.87e-19 4.44e-20 (1.17e-19, 2.98e-19) -181.64 < .001
group (3) 9.47e-17 4.54e-17 (3.70e-17, 2.42e-16) -76.97 < .001
YEAR * group (2) 1.02 1.15e-04 (1.02, 1.02) 193.27 < .001
YEAR * group (3) 1.02 2.38e-04 (1.02, 1.02) 79.05 < .001
Random Effects
SD (Intercept: SALID1) 0.51
Código
model_4_poisson_off <- glmer(
  deaths ~ YEAR * group + (1|SALID1) + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?;Model is nearly unidentifiable: large eigenvalue ratio
 - Rescale variables?
Código
performance(model_4_poisson_off) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
1.19e+05 1.19e+05 1.19e+05 0.06 6.75e-03 0.05 524.84 1 -14.64 9.99e-03
Código
parameters(model_4_poisson_off) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -33.92 0.16 (-34.22, -33.61) -216.68 < .001
YEAR 0.01 7.41e-05 (0.01, 0.01) 196.17 < .001
group (2) -19.43 0.35 (-20.11, -18.74) -55.48 < .001
group (3) 41.18 0.25 (40.70, 41.66) 167.28 < .001
group (4) 7.20 0.74 (5.75, 8.65) 9.72 < .001
YEAR * group (2) 9.64e-03 1.70e-04 (9.30e-03, 9.97e-03) 56.83 < .001
YEAR * group (3) -0.02 1.16e-04 (-0.02, -0.02) -178.48 < .001
YEAR * group (4) -3.74e-03 3.61e-04 (-4.45e-03, -3.03e-03) -10.35 < .001
Random Effects
SD (Intercept: SALID1) 0.51
Código
parameters(model_4_poisson_off, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 1.86e-15 2.92e-16 (1.37e-15, 2.53e-15) -216.68 < .001
YEAR 1.01 7.52e-05 (1.01, 1.01) 196.17 < .001
group (2) 3.65e-09 1.28e-09 (1.84e-09, 7.25e-09) -55.48 < .001
group (3) 7.65e+17 1.88e+17 (4.72e+17, 1.24e+18) 167.28 < .001
group (4) 1337.47 991.02 (313.02, 5714.71) 9.72 < .001
YEAR * group (2) 1.01 1.71e-04 (1.01, 1.01) 56.83 < .001
YEAR * group (3) 0.98 1.14e-04 (0.98, 0.98) -178.48 < .001
YEAR * group (4) 1.00 3.60e-04 (1.00, 1.00) -10.35 < .001
Random Effects
SD (Intercept: SALID1) 0.51

Ingreso de los valores predictivos para los modelos de 3 y 4 clústers:

Código
gbmt_log_merged2 <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_poisson_off, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_poisson_off, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest()
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(data)`.
Código
gbmt_log_merged2 <- gbmt_log_merged2 %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

6.2.2 Gráfico

Debido a que se ha agregado a los interceptos de las ciudades como efecto aleatorio, entonces las estimaciones y predicciones van a variar de ciudad a ciudad. Para poder visualizarlo, se podría resumir los datos con su media.

Código
plot_poisson_offset <- gbmt_log_merged2 %>% 
  group_by(Clusters, group, YEAR) %>% 
  summarise(
    across(
      c(predicted_deaths_100k, 
        deaths_per_100k),
      mean
    )
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line() +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.
Código
plot_poisson_offset

Código
ggsave(
  plot_poisson_offset,
  filename = "02_output/plots/promedio_muertes_group_poisson.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)
Código
plot_poisson_offset_g <- gbmt_log_merged2 %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(aes(group = SALID1)) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_grid(vars(Clusters), vars(group)) +
  # facet_wrap(vars(Clusters)) +
  theme_minimal()

plot_poisson_offset_g

Código
ggsave(
  plot_poisson_offset_g,
  filename = "02_output/plots/promedio_muertes_group_poisson_g.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)

6.3 Modelo 3 - Con offset sin efectos aleatorios

6.3.1 Estimación

Las estimaciones no consideran variaciones entre las ciudades

Código
model_3_poisson_off2 <- glm(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = poisson(link = "log")
)

performance(model_3_poisson_off2) %>% print_html()
AIC AICc BIC R2_Nagelkerke RMSE Sigma Score_log Score_spherical
5.12e+06 5.12e+06 5.12e+06 1 5381.53 1 -Inf 2.93e-03
Código
parameters(model_3_poisson_off2) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 20.91 0.17 (20.57, 21.25) 120.50 < .001
YEAR -0.01 8.64e-05 (-0.01, -0.01) -150.15 < .001
group (2) -83.35 0.22 (-83.78, -82.93) -381.40 < .001
group (3) -58.38 0.46 (-59.28, -57.48) -127.09 < .001
YEAR * NA 0.04 1.09e-04 (0.04, 0.04) 384.71 < .001
YEAR * NA 0.03 2.29e-04 (0.03, 0.03) 127.58 < .001
Código
parameters(model_3_poisson_off2, exponentiate = TRUE) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 1.21e+09 2.10e+08 (8.62e+08, 1.70e+09) 120.50 < .001
YEAR 0.99 8.53e-05 (0.99, 0.99) -150.15 < .001
group (2) 6.30e-37 1.38e-37 (4.11e-37, 9.67e-37) -381.40 < .001
group (3) 4.41e-26 2.02e-26 (1.79e-26, 1.08e-25) -127.09 < .001
YEAR * NA 1.04 1.13e-04 (1.04, 1.04) 384.71 < .001
YEAR * NA 1.03 2.36e-04 (1.03, 1.03) 127.58 < .001
Código
model_4_poisson_off2 <- glm(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = poisson(link = "log")
)
performance(model_4_poisson_off2) %>% print_html()
AIC AICc BIC R2_Nagelkerke RMSE Sigma Score_log Score_spherical
5.19e+06 5.19e+06 5.19e+06 1 5419.27 1 -Inf 3.19e-03
Código
parameters(model_4_poisson_off2) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) -61.03 0.14 (-61.31, -60.75) -430.71 < .001
YEAR 0.03 7.06e-05 (0.03, 0.03) 399.46 < .001
group (2) -4.04 0.33 (-4.69, -3.39) -12.18 < .001
group (3) 83.26 0.23 (82.82, 83.70) 369.19 < .001
group (4) 33.79 0.71 (32.39, 35.19) 47.27 < .001
YEAR * NA 1.96e-03 1.65e-04 (1.63e-03, 2.28e-03) 11.84 < .001
YEAR * NA -0.04 1.12e-04 (-0.04, -0.04) -372.41 < .001
YEAR * NA -0.02 3.56e-04 (-0.02, -0.02) -47.89 < .001
Código
parameters(model_4_poisson_off2, exponentiate = TRUE) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 3.12e-27 4.43e-28 (2.37e-27, 4.12e-27) -430.71 < .001
YEAR 1.03 7.26e-05 (1.03, 1.03) 399.46 < .001
group (2) 0.02 5.83e-03 (9.17e-03, 0.03) -12.18 < .001
group (3) 1.45e+36 3.26e+35 (9.29e+35, 2.25e+36) 369.19 < .001
group (4) 4.74e+14 3.39e+14 (1.17e+14, 1.92e+15) 47.27 < .001
YEAR * NA 1.00 1.66e-04 (1.00, 1.00) 11.84 < .001
YEAR * NA 0.96 1.08e-04 (0.96, 0.96) -372.41 < .001
YEAR * NA 0.98 3.50e-04 (0.98, 0.98) -47.89 < .001

Ingreso de las predicciones en el df:

Código
gbmt_log_merged3 <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_poisson_off2, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_poisson_off2, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest(cols = c(data))

gbmt_log_merged3 <- gbmt_log_merged3 %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

6.3.2 Gráfico

Gráfico de los datos resumidos:

Código
plot_poisson2_offset <- gbmt_log_merged3 %>% 
  group_by(Clusters, group, YEAR) %>%
  summarise(
    across(
      c(predicted_deaths_100k,
        deaths_per_100k),
      mean
    )
  ) %>%
  ungroup() %>%
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.
Código
plot_poisson2_offset

Código
ggsave(
  plot_poisson2_offset,
  filename = "02_output/plots/promedio_muertes_group_poisson2.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)

Sin resumir:

Código
plot_poisson2_offset_g <- gbmt_log_merged2 %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()

plot_poisson2_offset_g

Código
ggsave(
  plot_poisson2_offset_g,
  filename = "02_output/plots/plot_poisson2_offset_g.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)

6.4 Modelo 4 - Con offset Binomial negativa

6.4.1 Estimación

Estimación de los modelos con la información poblacional como offset

Código
model_3_binomial_neg <- glmmTMB(
  deaths ~ YEAR * group + (1|SALID1) + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3),
  family = nbinom2(link = "log")
)


performance(model_3_binomial_neg) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
54551.00 54551.03 54601.32 0.06 6.60e-03 0.05 657.94 119.23 -8.07 0.01
Código
parameters(model_3_binomial_neg) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -6.35 1.21 (-8.72, -3.98) -5.25 < .001
YEAR 6.19e-04 6.02e-04 (-5.61e-04, 1.80e-03) 1.03 0.304
group (2) -34.61 1.49 (-37.54, -31.68) -23.16 < .001
group (3) -25.94 2.01 (-29.88, -21.99) -12.88 < .001
YEAR * NA 0.02 7.43e-04 (0.02, 0.02) 23.45 < .001
YEAR * NA 0.01 1.00e-03 (0.01, 0.01) 12.97 < .001
Dispersion
(Intercept) 119.23 (113.47, 125.28)
Random Effects Variances
SD (Intercept: SALID1) 0.51
Código
parameters(model_3_binomial_neg, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 1.75e-03 2.11e-03 (1.63e-04, 0.02) -5.25 < .001
YEAR 1.00 6.02e-04 (1.00, 1.00) 1.03 0.304
group (2) 9.32e-16 1.39e-15 (4.98e-17, 1.74e-14) -23.16 < .001
group (3) 5.45e-12 1.10e-11 (1.05e-13, 2.82e-10) -12.88 < .001
YEAR * NA 1.02 7.56e-04 (1.02, 1.02) 23.45 < .001
YEAR * NA 1.01 1.01e-03 (1.01, 1.02) 12.97 < .001
Dispersion
(Intercept) 119.23 (113.47, 125.28)
Random Effects Variances
SD (Intercept: SALID1) 0.51
Código
model_4_binomial_neg <- glmmTMB(
  deaths ~ YEAR * group + (1|SALID1) + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4),
  family = nbinom2(link = "log")
)


performance(model_4_binomial_neg) %>% print_html()
AIC AICc BIC R2_conditional R2_marginal ICC RMSE Sigma Score_log Score_spherical
54411.37 54411.43 54474.28 0.06 6.57e-03 0.05 603.31 124.48 -8.07 0.01
Código
parameters(model_4_binomial_neg) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) -36.46 0.96 (-38.35, -34.57) -37.79 < .001
YEAR 0.02 4.80e-04 (0.01, 0.02) 32.93 < .001
group (2) -11.73 1.64 (-14.94, -8.51) -7.15 < .001
group (3) 32.75 1.54 (29.72, 35.78) 21.21 < .001
group (4) 8.79 2.89 (3.12, 14.45) 3.04 0.002
YEAR * NA 5.80e-03 8.16e-04 (4.20e-03, 7.40e-03) 7.11 < .001
YEAR * NA -0.02 7.68e-04 (-0.02, -0.02) -21.50 < .001
YEAR * NA -4.53e-03 1.44e-03 (-7.35e-03, -1.71e-03) -3.15 0.002
Dispersion
(Intercept) 124.48 (118.44, 130.83)
Random Effects Variances
SD (Intercept: SALID1) 0.51
Código
parameters(model_4_binomial_neg, exponentiate = TRUE) %>% print_html()
Model Summary
Parameter Coefficient SE 95% CI z p
Fixed Effects
(Intercept) 1.47e-16 1.41e-16 (2.21e-17, 9.72e-16) -37.79 < .001
YEAR 1.02 4.88e-04 (1.01, 1.02) 32.93 < .001
group (2) 8.08e-06 1.32e-05 (3.24e-07, 2.01e-04) -7.15 < .001
group (3) 1.67e+14 2.58e+14 (8.11e+12, 3.45e+15) 21.21 < .001
group (4) 6542.47 18921.32 (22.59, 1.89e+06) 3.04 0.002
YEAR * NA 1.01 8.21e-04 (1.00, 1.01) 7.11 < .001
YEAR * NA 0.98 7.55e-04 (0.98, 0.99) -21.50 < .001
YEAR * NA 1.00 1.43e-03 (0.99, 1.00) -3.15 0.002
Dispersion
(Intercept) 124.48 (118.44, 130.83)
Random Effects Variances
SD (Intercept: SALID1) 0.51

Ingreso de los valores predictivos para los modelos de 3 y 4 clústers:

Código
gbmt_log_merged4 <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_binomial_neg, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_binomial_neg, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest()
Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(data)`.
Código
gbmt_log_merged4 <- gbmt_log_merged4 %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

6.4.2 Gráfico

Debido a que se ha agregado a los interceptos de las ciudades como efecto aleatorio, entonces las estimaciones y predicciones van a variar de ciudad a ciudad. Para poder visualizarlo, se podría resumir los datos con su media.

Código
plot_binomialoffset <- gbmt_log_merged4 %>% 
  group_by(Clusters, group, YEAR) %>% 
  summarise(
    across(
      c(predicted_deaths_100k, 
        deaths_per_100k),
      mean
    )
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line() +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.
Código
plot_binomialoffset

Código
ggsave(
  plot_binomialoffset,
  filename = "02_output/plots/promedio_muertes_group_binomial.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)
Código
plot_binomialoffset_g <- gbmt_log_merged4 %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(aes(group = SALID1)) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_grid(vars(Clusters), vars(group)) +
  # facet_wrap(vars(Clusters)) +
  theme_minimal()

plot_binomialoffset_g

Código
ggsave(
  plot_binomialoffset_g,
  filename = "02_output/plots/promedio_muertes_group_binomial_g.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)

6.5 Modelo 5 - Con offset sin efectos aleatorios - Binomial Negativo

6.5.1 Estimación

Las estimaciones no consideran variaciones entre las ciudades

Código
model_3_binomial_neg2 <- glm.nb(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 3)
)


performance(model_3_binomial_neg2) %>% print_html()
AIC AICc BIC R2_Nagelkerke RMSE Sigma Score_log Score_spherical
67094.71 67094.74 67138.74 0.17 5584.64 1 -8.56 0.01
Código
parameters(model_3_binomial_neg2) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) -29.08 6.70 (-42.16, -15.98) -4.34 < .001
YEAR 0.01 3.34e-03 (5.51e-03, 0.02) 3.60 < .001
group (2) -28.37 8.26 (-44.50, -12.25) -3.43 < .001
group (3) -18.24 11.10 (-39.93, 3.45) -1.64 0.100
YEAR * NA 0.01 4.11e-03 (6.26e-03, 0.02) 3.47 < .001
YEAR * NA 9.10e-03 5.53e-03 (-1.71e-03, 0.02) 1.65 0.100
Código
parameters(model_3_binomial_neg2, exponentiate = TRUE) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 2.36e-13 1.58e-12 (4.92e-19, 1.15e-07) -4.34 < .001
YEAR 1.01 3.38e-03 (1.01, 1.02) 3.60 < .001
group (2) 4.78e-13 3.94e-12 (4.72e-20, 4.79e-06) -3.43 < .001
group (3) 1.20e-08 1.33e-07 (4.57e-18, 31.58) -1.64 0.100
YEAR * NA 1.01 4.17e-03 (1.01, 1.02) 3.47 < .001
YEAR * NA 1.01 5.58e-03 (1.00, 1.02) 1.65 0.100
Código
model_4_binomial_neg2 <- glm.nb(
  deaths ~ YEAR * group + offset(log(population_imp)), 
  data = gbmt_log_merged %>% filter(Clusters == 4)
)


performance(model_4_binomial_neg2) %>% print_html()
AIC AICc BIC R2_Nagelkerke RMSE Sigma Score_log Score_spherical
67142.72 67142.77 67199.33 0.15 5678.67 1 -8.59 0.01
Código
parameters(model_4_binomial_neg2) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) -48.65 5.45 (-59.28, -38.00) -8.92 < .001
YEAR 0.02 2.72e-03 (0.02, 0.03) 8.07 < .001
group (2) -25.85 9.24 (-43.90, -7.80) -2.80 0.005
group (3) 22.86 8.76 (5.77, 39.96) 2.61 0.009
group (4) 12.29 16.32 (-19.60, 44.22) 0.75 0.451
YEAR * NA 0.01 4.60e-03 (3.89e-03, 0.02) 2.80 0.005
YEAR * NA -0.01 4.37e-03 (-0.02, -3.02e-03) -2.64 0.008
YEAR * NA -6.29e-03 8.13e-03 (-0.02, 9.61e-03) -0.77 0.439
Código
parameters(model_4_binomial_neg2, exponentiate = TRUE) %>% print_html()
Parameter Coefficient SE 95% CI z p
(Intercept) 7.47e-22 4.07e-21 (1.80e-26, 3.12e-17) -8.92 < .001
YEAR 1.02 2.78e-03 (1.02, 1.03) 8.07 < .001
group (2) 5.92e-12 5.47e-11 (8.62e-20, 4.08e-04) -2.80 0.005
group (3) 8.48e+09 7.43e+10 (320.34, 2.26e+17) 2.61 0.009
group (4) 2.18e+05 3.56e+06 (3.06e-09, 1.61e+19) 0.75 0.451
YEAR * NA 1.01 4.66e-03 (1.00, 1.02) 2.80 0.005
YEAR * NA 0.99 4.31e-03 (0.98, 1.00) -2.64 0.008
YEAR * NA 0.99 8.08e-03 (0.98, 1.01) -0.77 0.439

Ingreso de las predicciones en el df:

Código
gbmt_log_merged5 <- gbmt_log_merged %>% 
  group_nest(Clusters) %>% 
  mutate(
    data = map2(
      data, Clusters,
      ~ case_when(
        .y == 3 ~ mutate(
          .x, 
          predicted_deaths = predict(model_3_binomial_neg2, type = "response"),
        ),
        .y == 4 ~ mutate(
          .x, 
          predicted_deaths = predict(model_4_binomial_neg2, type = "response"),
        ),
      )
    )
  ) %>% 
  unnest(cols = c(data))

gbmt_log_merged5 <- gbmt_log_merged5 %>% 
  mutate(
    predicted_deaths_100k = predicted_deaths / population_imp * 100000
  )

6.5.2 Gráfico

Gráfico de los datos resumidos:

Código
plot_binomial2offset <- gbmt_log_merged5 %>% 
  group_by(Clusters, group, YEAR) %>%
  summarise(
    across(
      c(predicted_deaths_100k,
        deaths_per_100k),
      mean
    )
  ) %>%
  ungroup() %>%
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
`summarise()` has grouped output by 'Clusters', 'group'. You can override using
the `.groups` argument.
Código
plot_binomial2offset

Código
ggsave(
  plot_binomial2offset,
  filename = "02_output/plots/promedio_muertes_group_binomial2.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)

Sin resumir:

Código
plot_binomial2offset_g <- gbmt_log_merged5 %>% 
  ggplot(aes(x = YEAR, 
           y = predicted_deaths_100k,
           color = group)) + 
  geom_point(aes(y = deaths_per_100k), alpha = 0.5,
             position = position_jitter(h = 0.2)) +  # Datos observados
  geom_line(linewidth = 1) +
  labs(title = "Promedio de Muertes por 100,000 habitantes por Grupo a lo largo del Tiempo",
       x = "Año", y = "Muertes esperadas por 100,000 habitantes", color = "Grupo") +
  facet_wrap(vars(Clusters)) +
  theme_minimal()
Código
ggsave(
  plot_binomial2offset_g,
  filename = "02_output/plots/promedio_muertes_group_binomial2_g.png",
  dpi = 300,
  bg = "white",
  width = 7,
  height = 4.5
)

7 Mapa

Código
library(sf)
Linking to GEOS 3.12.0, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
Código
library(biscale)
Código
salid1_pubsalid1 <- readr::read_csv("01_data/raw/SALID1_PUBSALID1.csv")
Rows: 371 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (2): SALID1, PUBSALID1

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Código
gdb_path <- "01_data/raw/Data Request MS242_09222023/MS242_L1.gdb"
gdb_salurbal <- st_layers(gdb_path)

# cities_l1ad <- st_read(
#   gdb_path,
#   gdb_salurbal$name[[1]]
# ) %>%
#   as_tibble() %>%
#   st_as_sf() %>%
#   left_join(
#     salid1_pubsalid1
#   ) %>%
#   relocate(SALID1, .after = PUBSALID1)

cities_l1ad_noislands <- st_read(
  gdb_path,
  gdb_salurbal$name[[2]]
) %>%
  as_tibble() %>%
  st_as_sf() %>%
  left_join(
    salid1_pubsalid1
  ) %>%
  relocate(SALID1, .after = PUBSALID1)
Reading layer `SALURBAL_L1AD_NoSmallIslands_PublicIDs20230922' from data source 
  `/home/brian/data/Github/salurbal_2/01_data/raw/Data Request MS242_09222023/MS242_L1.gdb' 
  using driver `OpenFileGDB'
Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, : GDAL
Message 1: organizePolygons() received a polygon with more than 100 parts. The
processing may be really slow.  You can skip the processing by setting
METHOD=SKIP, or only make it analyze counter-clock wise parts by setting
METHOD=ONLY_CCW if you can assume that the outline of holes is counter-clock
wise defined
Simple feature collection with 371 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -117.1243 ymin: -54.28742 xmax: -34.79288 ymax: 32.71865
Geodetic CRS:  WGS 84
Joining with `by = join_by(PUBSALID1)`
Código
get_shp <- function(ISO2) {
  # Obtener shp_1
  shp_0 <- geodata::gadm(
    ISO2,
    path = "01_data/raw/shp/",
    level = 0
  ) %>%
    st_as_sf()
  
  # Obtener shp_1
  shp_1 <- geodata::gadm(
    ISO2,
    path = "01_data/raw/shp/",
    level = 1
  ) %>%
    st_as_sf()

  # Obtener shp_2 y aplicar las transformaciones
  shp_2 <- cities_l1ad_noislands %>%
    filter(COUNTRYAB == ISO2) %>%
    janitor::clean_names()

  # Devolver la lista con shp_1 y shp_2
  list(
    shp_0 = shp_0,
    shp_1 = shp_1,
    shp_2 = shp_2
  )
}


shp_list <- unique(gbmt_fit_final$gbmt_fit_total[[1]]$ISO2) %>%
  append(
    c("GUY", "SUR", "BOL", "PRY",
      "URY", "ECU", "BLZ", "HND",
      "TTO", "VEN")
  ) %>% 
  set_names() %>%
  map(get_shp)
Código
join_data_shp3 <- function(data, shp_list) {
  # Proceso de unión con shapefiles
  data %>%
    mutate(
      pubsalid1 = as.numeric(as.character(pubsalid1)),
      # City = str_to_lower(City),
      group = factor(group)
    ) %>%
    group_nest(ISO2) %>%
    drop_na(ISO2) %>%
    mutate(
      # country = str_to_title(country),
      shp_1 = map(ISO2, ~ shp_list[[.x]]$shp_1),
      shp_2 = map(ISO2, ~ shp_list[[.x]]$shp_2),
      shp_inner = map2(
        data,
        shp_2,
        ~ inner_join(
          .x,
          .y,
          by = join_by(pubsalid1)
        ) %>%
          st_as_sf()
      )
    )
}
Código
gbmt_interest_deaths <- gbmt_log_merged %>% 
  filter(Clusters == 4) %>% 
  group_by(
    across(c(ISO2:SALID1, group))
  ) %>% 
  summarise(
    mean_deaths_per_100k = mean(deaths_per_100k, na.rm = TRUE)
  ) %>% 
  ungroup()
`summarise()` has grouped output by 'ISO2', 'pubsalid1', 'SALID1'. You can
override using the `.groups` argument.
Código
gbmt_interest_deaths_shp <- map(
  list(gbmt_interest_deaths),
  ~ join_data_shp3(.x, shp_list)
)

city_data_deaths <- gbmt_interest_deaths_shp[[1]] %>% 
  select(ISO2, shp_inner) %>% 
  unnest(cols = shp_inner) %>%
  st_as_sf() %>%
  st_centroid()
Warning: st_centroid assumes attributes are constant over geometries
Código
country_data <- map(shp_list, 1) %>% 
  bind_rows() %>% 
  mutate(
    study = c(rep(TRUE, 11), rep(FALSE, 10))
  )

countries_by_study <- country_data %>% 
  as_tibble() %>% 
  select(GID_0, study)

level1_data <- map(shp_list, 2) %>% 
  bind_rows() %>% 
  left_join(countries_by_study)
Joining with `by = join_by(GID_0)`
Código
city_data_deaths_biscale <- bi_class(city_data_deaths,
  x = mean_deaths_per_100k,
  y = group,
  style = "quantile", dim = 4
)

city_data_deaths %>% 
 mutate(a = ntile(mean_deaths_per_100k, 4)) %>% 
 group_by(group, a) %>% 
 summarise(min = min(mean_deaths_per_100k), max = max(mean_deaths_per_100k))
`summarise()` has grouped output by 'group'. You can override using the
`.groups` argument.
Simple feature collection with 16 features and 4 fields
Geometry type: GEOMETRY
Dimension:     XY
Bounding box:  xmin: -116.493 ymin: -53.49712 xmax: -34.9476 ymax: 32.40971
Geodetic CRS:  WGS 84
# A tibble: 16 × 5
# Groups:   group [4]
   group     a   min   max                                                 Shape
   <fct> <int> <dbl> <dbl>                                        <GEOMETRY [°]>
 1 1         1  416.  560. MULTIPOINT ((-43.29996 -19.60061), (-40.54615 -20.58…
 2 1         2  561.  711. MULTIPOINT ((-44.93073 -20.12369), (-43.78866 -20.66…
 3 1         3  715.  997. MULTIPOINT ((-42.50153 -22.31982), (-42.87377 -22.31…
 4 1         4 1004. 4092. MULTIPOINT ((-42.10004 -22.77696), (-42.36581 -22.80…
 5 2         1  313.  549. MULTIPOINT ((-39.29 -16.61899), (-43.2905 -4.86208),…
 6 2         2  567.  704. MULTIPOINT ((-36.64182 -9.749282), (-36.50114 -8.929…
 7 2         3  718.  998. MULTIPOINT ((-41.38362 -17.7145), (-35.29258 -8.1541…
 8 2         4 1009. 7292. MULTIPOINT ((-39.42183 -7.277126), (-114.9182 30.042…
 9 3         1  145.  558. MULTIPOINT ((-41.97768 -22.29566), (-44.353 -22.9866…
10 3         2  561.  687. MULTIPOINT ((-44.25334 -19.4369), (-101.3684 20.6997…
11 3         3  736.  984. MULTIPOINT ((-34.9476 -7.102349), (-116.493 32.40971…
12 3         4 1096. 3878. MULTIPOINT ((-40.38524 -20.27279), (-99.67373 27.475…
13 4         1  395.  547. MULTIPOINT ((-114.3506 32.01702), (-104.8691 21.5672…
14 4         2  563.  631. MULTIPOINT ((-77.07337 3.593666), (-70.04117 -20.777…
15 4         3  973.  973.                            POINT (-102.9961 23.23687)
16 4         4 1074. 1169. MULTIPOINT ((-35.80997 -9.567409), (-115.2819 31.897…
Código
custom_pal <- c(
  "1-1" = "#e6ccf5",  # Morado muy claro
  "2-1" = "#c98bd9",  # Morado claro intensificado
  "3-1" = "#8a5599",  # Morado oscuro
  "4-1" = "#562f61",  # Morado muy oscuro
  "1-2" = "#b9f2a1",  # Verde muy claro
  "2-2" = "#6ed965",  # Verde claro intensificado
  "3-2" = "#348c3f",  # Verde oscuro
  "4-2" = "#1e5630",  # Verde muy oscuro
  "1-3" = "#a3e4fd",  # Azul celeste muy claro
  "2-3" = "#5cb8e6",  # Azul claro intensificado
  "3-3" = "#2d6899",  # Azul oscuro
  "4-3" = "#1a4467",  # Azul marino muy oscuro
  "1-4" = "#ffaba8",  # Coral muy claro
  "2-4" = "#ff5747",  # Rojo claro intensificado
  "3-4" = "#c7322c",  # Rojo oscuro
  "4-4" = "#8b1b15"   # Burdeos muy oscuro
)




ggmap <- level1_data %>%
  ggplot() +
  geom_sf(
    aes(fill = study),
    linewidth = 0.05
  ) +
  scale_fill_manual(
    values = c("#bdbdbd", "#e5e5e5"),
    guide = "none"
  ) + 
  geom_sf(
    data = country_data,
    linewidth = 0.35, 
    color = "#3f3f3f",
    fill = NA
  ) +
  geom_sf(
    data = city_data_deaths_biscale,
    mapping = aes(color = bi_class),
    #color = "white", 
    size = 2.5,
    show.legend = FALSE
  ) +
  bi_scale_color(pal = custom_pal, dim = 4) +
  bi_theme()

legend <- bi_legend(
  pal = custom_pal,
  dim = 4,
  xlab = "Mortality per 100K inhabitants",
  ylab = "Cluster (1-4)",
  size = 5
)

x_range_map2 <- city_data_deaths_biscale %>% 
  separate_wider_delim(
    bi_class,
    delim = "-",
    names = c("X", "Y")
  ) %>% 
  group_by(X) %>% 
  summarise(
    min = min(mean_deaths_per_100k),
    max = max(mean_deaths_per_100k)
  ) %>% 
  mutate(
    across(
      min:max,
      round
    ),
    range = paste0("[", min, "; \n", max, "]")
  ) 

legend <- legend + 
  scale_x_continuous(
    labels = c(NA, x_range_map2$range, NA)
  ) +
  scale_y_continuous(
    labels = c(NA, 1:4, NA)
  ) + 
  theme(
    axis.text = element_text(),
    axis.title.y = element_text(
      margin = margin(r = 4)
    ),
    axis.title.x = element_text(
      margin = margin(t = 4)
    )
  )


library(cowplot)

finalPlot <- ggdraw(ggmap) +
  # draw_plot(ggmap, 0, 0, 1.5, 1.5) +
  draw_plot(legend, 0.15, 0.25, 0.27, 0.27)

finalPlot

Código
ggsave("ggmap2.png",
       finalPlot,
       height = 7,
       width = 7,
       dpi = 600)